##### algorithms to be used in deterministic satellite-debris model dynamic programming simulation script

### open access policy algorithm - full serial (still fast - just rootfinding with no integration)
oapolicy <- function(igrid,fe_eqm,t,p,F,...) {
ptm <- proc.time()
	launches <- rep(0,length=dim(igrid)[1])
	fe_eqm_vec <- rep(fe_eqm,length=dim(igrid)[1])
	p_vec <- rep(p,length=dim(igrid)[1])
	F_vec <- rep(F,length=dim(igrid)[1])
	for(j in 1:dim(igrid)[1]) {
		X <- uniroot.all(eqmcond,c(0,1e+15),S=igrid$sats[j],D=igrid$debs[j],fe_eqm=fe_eqm)
		launches[j] <- ifelse(length(X)==0,0,X)
	}

	launches[which(launches=="Inf")] <- -1
	fleet_size <- S_(launches,igrid$sats,igrid$debs)

	loss <- L(S_(launches,igrid$sats,igrid$debs),D_(launches,igrid$sats,igrid$debs))
	results <- as.data.frame(cbind(igrid,launches,fleet_size,loss,fe_eqm_vec))
	colnames(results) <- c("satellites","debris","oa_launch_pfn","oa_fleet_size","loss_next","fe_eqm")
	return(results)
}

### Computes backwards induction from given terminal value with given policy to get value function
policy_eval_BI <- function(igrid,launch_policy,value_fn,T,tps_model,p_t,F_t,asats_t) {
	count <- 0
	p_vec <- rep(p_t,length=T)
	F_vec <- rep(F_t,length=T)
	asats_vec <- rep(0,length=T) 
	tot.time <- proc.time()[3]
	tps_x <- as.matrix(cbind(igrid$sats,igrid$debs))
	## make progress bar
	BIpb <- progress_bar$new(format="Doing backwards induction [:bar] :percent",total=(T+1))
	BIpb$tick(0)
	while(count<=T) {
		value_fn <- foreach(i=1:length(launch_policy), .export=ls(), .combine=rbind, .inorder=TRUE) %dopar% {
			result <- fleet_preval_basic(X=launch_policy[i],S=igrid$sats[i],D=igrid$debs[i],value_fn=value_fn,asats=asats_vec,t=T,p=p_vec,F=F_vec,igrid=igrid,tps_model=tps_model)
			result
		}
		tps_y <- as.matrix(value_fn)
		tps_model <- suppressWarnings(Tps(x=tps_x,Y=tps_y, lambda=0))
		count <- count + 1
		BIpb$tick()
	}
	BIpb$terminate()
	tot.time <- round(round(proc.time()[3] - tot.time,3)/60,3)
	cat(paste0("\nTotal time taken for backwards induction: ", tot.time, " minutes.\n Average time per period: ", round(tot.time/length(launch_policy),3), " minutes.\n"))
	return(value_fn)
}

### fleet planner VFI algorithm: solve for optimal policy
dynamic_vfi_solver <- function(panel,igrid,p,F,...) {
	# initialize hyperparameters
	panrows <- nrow(panel)
	gridmin <- min(igrid)
	Sgridmax <- max(igrid$sats)
	Dgridmax <- max(igrid$debs)
	n_grid_points <- length(unique(igrid$sats))*length(unique(igrid$debs))
	S_base_grid <- unique(igrid$sats)
	D_base_grid <- unique(igrid$debs)

	# initialize output objects
	newX <- rep(-1,length=panrows)
	result <- cbind(newX,newX,new)
	# initialize epsilon-delta and count
	epsilon <- 1e-5
	delta_old <- 0
	delta <- epsilon + 10
	delta2 <- 25
	count <- 0
	cat(paste("\n Stopping criteria: distance < ", epsilon, "\n", sep=""))

	# solver loop
	while(delta > epsilon) {
		t.tm <- proc.time()
		## maximization step
		cat(paste0("\n Starting maximization step, grid size = ", n_grid_points,": \n","."))
		m.tm <- proc.time()

		result <- foreach(k=1:panrows, .export=ls(), .combine=rbind, .inorder=TRUE) %dopar% {
			ctm <- proc.time()[3]
			ulim <- eqm_launch_rate(S=panel$S[k], D=panel$D[k], fe_eqm=fe_eqm)+1
			## use numerical optimizer to find optimal X from value fn
			solution_full <- optim(par=panel$X[k], fn=fleet_preval_basic, S=panel$S[k], D=panel$D[k], value_fn=panel$V, p=p, F=F, igrid=igrid, control=list(fnscale=-1), method="L-BFGS-B", lower=0, upper=ulim)
			solution <- solution_full$par
			vfn <- solution_full$value

			if(length(solution)==0) {solution <- 0}
			values <- fleet_preval_basic(X=solution,S=panel$S[k],D=panel$D[k],value_fn=panel$V,p=p,F=F,igrid=igrid)
			
			# add 0 to the set of candidate solutions, in case it isn't there
			solution <- c(0,solution)
			values <- c(fleet_preval_basic(X=0,S=panel$S[k],D=panel$D[k],value_fn=panel$V,p=p,F=F,igrid=igrid),values)
			soln_dfrm <- data.frame(policy=solution,value=values)

			# select the candidate solution with the highest value
			best_val <- which.max(soln_dfrm$value)
			launch_rate <- soln_dfrm[best_val,1]
			vfn <- soln_dfrm[best_val,2]

			clock <- proc.time()[3] - ctm
			result <- c(launch_rate,vfn,clock)
			return(result)
		}

		maxim.time <- (proc.time()-m.tm)[3]
		avg_cell_time <- mean(result[,3])
		cat(paste0("Finished. \n Average cell time: ",round(avg_cell_time,3)," seconds.\nTotal grid time: ",round((avg_cell_time*n_grid_points),3)," seconds \n"))
		rownames(newX) <- NULL

		## save new policy and new value
		rownames(result) <- NULL
		newX <- result[,1]
		newV <- result[,2]

		## calculate distance in new value and update
		delta <- max(abs((panel$V-newV)))
		panel$V <- newV
		panel$X <- newX

		## update old delta and calculate percentage change in delta by midpoint method	
		delta2 <- abs( (delta-delta_old)/(0.5*(delta+delta_old)) )
		delta_old <- delta

		## update count
		count <- count+1
	
		## calculate total time
		tot.time <- (proc.time() - t.tm)[3]
		## out
		cat(paste("\n Finished iteration ", count,", distance is ", delta, "\n Stopping criteria: distance < ", epsilon, "\n", sep=""))
	}

	panel$V <- newV

	loss_vec <- L(S_(panel$X,panel$S,panel$D),D_(panel$X,panel$S,panel$D))
	fe_eqm_vec <- rep(fe_eqm,length=nrow(panel))

	output <- as.data.frame(cbind(satellites=panel$S,debris=panel$D,opt_launch_pfn=panel$X,opt_fleet_vfn=panel$V,optimal_fleet_size=S_(panel$X,panel$S,panel$D),loss_next=loss_vec,fe_eqm=fe_eqm_vec))
	colnames(output)[4] <- "opt_fleet_vfn"
	return(output)
}

### algorithm to compute a time path using thin plate spline interpolation of solved policy functions
# S0, D0 are the initial conditions.
tps_path_gen <- function(S0,D0,p,F,policy_path,launchcon_seq,igrid,ncores,linear_policy_interp) {
	times <- seq(from=1,to=T,by=1)	
	sat_seq <- rep(0,length=T)
	deb_seq <- rep(0,length=T)
	profit_seq <- rep(0,length=T)
	discounted_profit_seq <- rep(0,length=T)
	fleet_npv_path <- rep(0,length=T)
	runaway <- vector(mode="character",length=T)
	kessler <- vector(mode="character",length=T)
	
	X <- rep(-1,length=T)

	if(length(launchcon_seq)==-1) {launchcon_seq <- rep(upper,length=T)} # -1 is a flag to set the constraint large enough that it never binds

	sat_seq[1] <- S0
	deb_seq[1] <- D0
	launch_pfn <- as.vector(policy_path$opt_launch_pfn)
	fleet_vfn <- as.vector(policy_path$opt_fleet_vfn)

	# Thin plate splines to fit the policy and value functions
	s.tm <- proc.time()[3]
	cat(paste0("\nEstimating spline interpolant of policy function..."))
	tps_x <- as.matrix(cbind(current_sats,current_debs))
	tps_y <- as.matrix(launch_pfn)
	ifelse(linear_policy_interp==1,tps_model <- suppressWarnings(Tps(x=tps_x,Y=tps_y,lambda=0)),tps_model <- suppressWarnings(Tps(x=tps_x,Y=tps_y)))
	cat(paste0("\n Done. Total time taken: ",round(proc.time()[3] - s.tm,3)," seconds"))

	cat(paste0("\nEstimating spline interpolants of value functions..."))
	tps_x <- as.matrix(cbind(current_sats,current_debs))
	tps_y <- as.matrix(fleet_vfn[current_cost])
	vfn_tps_model <- suppressWarnings(Tps(x=tps_x,Y=tps_y,lambda=0))
	cat(paste0("\n Done. Total time taken: ",round(proc.time()[3] - s.tm,3)," seconds"))

	s.tm <- proc.time()[3]
	cat(paste0("\nGenerating policy time path..."))
	X[1] <- predict(spline_list[[1]],x=cbind(sat_seq[1],deb_seq[1]))
	X[1] <- ifelse(X[1]<0,0,X[1])
	X[1] <- ifelse(X[1]>launchcon_seq[1],X[1]<-launchcon_seq[1],X[1]<-X[1])
	profit_seq[1] <- one_p_return(X[1],sat_seq[1],p,F)
	discounted_profit_seq[1] <- one_p_return(X[1],sat_seq[1],1,p,F)
	fleet_npv_path[1] <- predict(vfn_tps_model,x=cbind(sat_seq[1],deb_seq[1]))
	ifelse(G(sat_seq[1],deb_seq[1])>d*deb_seq[1], runaway[1] <- "yes", runaway[1] <- "no")
	ifelse(G(0,deb_seq[1])>d*deb_seq[1]*(1 - (1>=(R_start-t0))*R_frac), kessler[1] <- "yes", kessler[1] <- "no")

	for(k in 2:T) {
		current_clock_time <- t0 + k # need to calculate what the time is in the outside world for launch constraint
		## if-else block for Kessler Syndrome, D=1e+6 is an upper bound. If the orbit is unusable (L(0,D)=1), then don't go through this computation and set the launch rate to zero. This is reasonable unless satellites make more than 100% of their total cost to build+launch every period, in which case you would still launch satellites then.
		if(deb_seq[(k-1)]<=1e+15){ 
			sat_seq[k] <- S_(X[(k-1)],sat_seq[(k-1)],deb_seq[(k-1)])
			deb_seq[k] <- D_(X[(k-1)],sat_seq[(k-1)],deb_seq[(k-1)])
			X[k] <- predict(tps_model[[k]],x=cbind(sat_seq[k],deb_seq[k]))
			X[k] <- ifelse(X[k]<0,0,X[k])
			X[k] <- ifelse(X[k]>launchcon_seq[current_clock_time],X[k]<-launchcon_seq[current_clock_time],X[k]<-X[k])
			profit_seq[k] <- one_p_return(X[k],sat_seq[k],k,p,F)
			discounted_profit_seq[k] <- profit_seq[k]*(discount^(times[(k-1)]))
			fleet_npv_path[k] <- predict(vfn_spline_list[[k]],x=cbind(sat_seq[k],deb_seq[k]))
			ifelse(G(sat_seq[k],deb_seq[k])>d*deb_seq[k], runaway[k] <- "yes", runaway[k] <- "no")
			ifelse(G(0,deb_seq[k])>d*deb_seq[k], kessler[k] <- "yes", kessler[k] <- "no")
		}
		if(deb_seq[(k-1)]>1e+15){
			sat_seq[k] <- S_(X[(k-1)],sat_seq[(k-1)],deb_seq[(k-1)])
			ifelse(deb_seq[(k-1)]>=1e+154, deb_seq[k] <- deb_seq[(k-1)], deb_seq[k] <- D_(X[(k-1)],sat_seq[(k-1)],deb_seq[(k-1)],asats_seq[(current_clock_time-1)]) ) # prevent NAs if the debris stock grows uncontrollably
			X[k] <- 0
			profit_seq[k] <- one_p_return(X[k],sat_seq[k],k,p,F)
			discounted_profit_seq[k] <- profit_seq[k]*(discount^(times[(k-1)]))
			fleet_npv_path[k] <- 0
			ifelse(G(sat_seq[k],deb_seq[k])>d*deb_seq[k], runaway[k] <- "yes", runaway[k] <- "no")
			ifelse(G(0,deb_seq[k])>d*deb_seq[k]*(1 - (k>=(R_start-t0))*R_frac), kessler[k] <- "yes", kessler[k] <- "no")
		} 
	}
	cat(paste0("\n Done. Total time taken: ",round(proc.time()[3] - s.tm,3)," seconds"))
	deb_seq[is.na(deb_seq)] <- max(!is.na(deb_seq))
	profit_seq[T] <- one_p_return(X[T],sat_seq[T],T,p,F)
	losses <- L(sat_seq,deb_seq)
	values <- data.frame(time=times,launches=X,satellites=sat_seq,debris=deb_seq ,runaway=runaway,kessler=kessler,fleet_flowv=profit_seq,fleet_pv=discounted_profit_seq,fleet_vfn_path=fleet_npv_path,collision_rate=losses, stringsAsFactors=FALSE)

	return(values)
}

##### Miscellanous helper functions

# Fleet pre-value function with hand-coded 2D interpolation. Potentially a little less accurate than a call to tps_model, but allows for much faster interpolant estimation.
fleet_preval_basic <- function(X,S,D,value_fn,p,F,igrid,...) {
	S_next <- S_(X,S,D)
	D_next <- D_(X,S,D)
	next_state <- c(S_next,D_next)
	gridmax <- max(igrid)
	interpolation <- interpolate(target=next_state,grid=igrid,values=value_fn)
	ifelse(next_state[2]>gridmax||L(next_state[1],next_state[2])==1,interpolation<-0,interpolation<-interpolation)
	prof <- one_p_return(X,S,p,F) + discount*interpolation
	return(prof)
}

# Plot policy and value functions on a given grid
plot_pfn_vfn <- function(vfn,launch_pfn,Sbasegrid,Dbasegrid,labels) {
	fv_mat <- t(matrix(vfn,nrow=length(Sbasegrid),ncol=length(Dbasegrid)))
	l_po_mat <- t(matrix(launch_pfn,nrow=length(Sbasegrid),ncol=length(Dbasegrid)))

	image2D(z=fv_mat,x=Dbasegrid,y=Sbasegrid,xlab=c("Debris"),ylab=c("Satellites"),col=plasma(n=100),contour=TRUE,main=labels[1])
	image2D(z=l_po_mat,x=Dbasegrid,y=Sbasegrid,xlab=c("Debris"),ylab=c("Satellites"),col=plasma(n=100), main=labels[2])
}

# function to translate [a,b] to [-1,1]; a = output_range[1], b = output_range[2]
translate <- function(input,output_range) {
	a <- output_range[1]
	b <- output_range[2]
	translated <- 2*((input - a)/(b - a)) - 1
	return(translated)
}

# function to untranslate [-1,1] to [a,b]; a = output_range[1], b = output_range[2]
untranslate <- function(input,output_range) {
	a <- output_range[1]
	b <- output_range[2]
	untranslated <- a + ((b - a)/2)*(input + 1)
	return(untranslated)
}

# function to redo a grid to be Chebyshev nodes
make_cheby <- function(input) {
	a <- min(input)
	b <- max(input)
	n <- length(input)
	cheby_nodes <- rep(-1,length=n)
	for(k in 1:n) {
		x <- k/n - 1/(2*n)
		y <- pi/(2*n)
		cheby_nodes[k] <- 0.5*(a+b) + 0.5*(b-a)*sec(y)*cospi(x)
	}
	cheby_nodes <- sort(cheby_nodes)
	return(cheby_nodes)
}

build_grid <- function(gridmin, Sgridmax, Dgridmax, Sgridlength, Dgridlength, cheby) {
	S_base_piece <- seq(from=gridmin, to=Sgridmax, length.out=Sgridlength)
	D_base_piece <- seq(from=gridmin, to=Dgridmax, length.out=Dgridlength)
	ifelse(cheby==1, S_base_piece<-make_cheby(S_base_piece), S_base_piece<-S_base_piece)
	ifelse(cheby==1, D_base_piece<-make_cheby(D_base_piece), D_base_piece<-D_base_piece)
	sats <- S_base_piece
	debs <- D_base_piece
	igrid <- as.data.frame(expand.grid(sats,debs))
	colnames(igrid) <- c("sats","debs")
	return(list(S_base_piece=S_base_piece,D_base_piece=D_base_piece,igrid=igrid))
}

grid_to_panel <- function(gridlist,launch_pguess,contval) {
	S_base_piece <- gridlist$S_base_piece
	D_base_piece <- gridlist$D_base_piece
	igrid <- gridlist$igrid

	# initialize empty matrix for panel
	pancols <- 4
	panrows <- dim(igrid)[1]
	panel <- data.frame(matrix(0,nrow=panrows,ncol=pancols))
	colnames(panel)[1:4] <- c("V","S","D","X")

	# satellite-debris grid parameters and assignments for panel
	n_grid_points <- length(S_base_piece)*length(D_base_piece)
	panel$S <- igrid$sats
	panel$D <- igrid$debs
	panel$X <- as.vector(launch_pguess)
	panel$V <- as.vector(contval)

	return(panel)
}

# 2D interpolation function: interpolate the supplied values between the two grid points nearest the target in each dimension
# target: a 1x2 vector
# grid: a nx2 vector
# values: a nx1 vector
interpolate <- function(target,grid,values) {
		# the following lines allow for rectangular grids - more points in one variable than another
		sat_grid_length <- length(unique(grid$sats))
		deb_grid_length <- length(unique(grid$debs))

		## S grid
		S_node_distances <- abs(target[1]-grid$sats)
		#S_nearest_node_idx <- sort(S_node_distances,index.return=TRUE)$ix[seq(1,2*sgl)]
		S_nearest_node_idx <- sort(S_node_distances,index.return=TRUE)$ix[seq(1,2*deb_grid_length)]
		## D_grid
		D_node_distances <- abs(target[2]-grid$debs)
		#D_nearest_node_idx <- sort(D_node_distances,index.return=TRUE)$ix[seq(1,2*sgl)]
		D_nearest_node_idx <- sort(D_node_distances,index.return=TRUE)$ix[seq(1,2*sat_grid_length)]
		
		# locate nearest S and D
		S_nearest_nodes <- grid$sats[c(S_nearest_node_idx[1],S_nearest_node_idx[deb_grid_length+1])]
		nearest_S_dist <- S_node_distances[c(S_nearest_node_idx[1],S_nearest_node_idx[deb_grid_length+1])]

		D_nearest_nodes <- grid$debs[sort(c(D_nearest_node_idx[1],D_nearest_node_idx[sat_grid_length+1]))]
		nearest_D_dist <- D_node_distances[sort(c(D_nearest_node_idx[1],D_nearest_node_idx[sat_grid_length+1]))]
		
		# "old" scheme: seemed to work ok...?
		S_node_wts <- c(0,0)
		D_node_wts <- c(0,0)
		S_node_wts[1] <- nearest_S_dist[2]/(nearest_S_dist[1]+nearest_S_dist[2])
		S_node_wts[2] <- nearest_S_dist[1]/(nearest_S_dist[1]+nearest_S_dist[2])
		D_node_wts[1] <- nearest_D_dist[2]/(nearest_D_dist[1]+nearest_D_dist[2])
		D_node_wts[2] <- nearest_D_dist[1]/(nearest_D_dist[1]+nearest_D_dist[2])
		nearby_values <- values[intersect(S_nearest_node_idx,D_nearest_node_idx)]
		S_comp_1 <- S_node_wts[1]*nearby_values[1] + S_node_wts[2]*nearby_values[3]
		S_comp_2 <- S_node_wts[1]*nearby_values[2] + S_node_wts[2]*nearby_values[4]
		interpolation <- D_node_wts[1]*S_comp_1 + D_node_wts[2]*S_comp_2

		return(interpolation)
}

